import os 
import logging

# Database
db_webgraph = 'db.webgraph'     # pre processing    -> binary storage 
    # src id | list of dest id 
 
db_webgraphindex = 'db.webgraph-index'     # pre processing    -> binary storage 
    # src id | start byte position in db.webgraph

db_urlid = 'db.url-id'        # pre processing    -> bsddb
    # url | id

db_idurl = 'db.id-url'      # pre processing    -> bsddb 
    # id | url 

db_title = 'db.title'           # pre processing    -> bsddb
    # id | title text

db_tpgraph = 'db.tpgraph'       # post processing   -> binary storage 
    # dest id | list of src id


# PATH
dirProject = '/'.join(os.getcwd().split('/')[:-1])
dirInput = os.path.join(dirProject,
    ('',
     'data/1000/',
     'data/wbCt100Sep08/',
     'data/1/',
     'data/big/',
    )[4]
)

docSeperator = '==P=>>>>=i===<<<<=T===>=A===<=!Junghoo!==>'
validContentTypes = ('text/html','text','text/plain','application/xhtml+xml')

# maximum tar .gz
maximumHost = 100000

os.system('rm db.* log')

